import pandas as pd
import numpy as np

df_data = pd.read_excel('all-疾病-症状.xlsx')
df1 = pd.read_csv('test1.csv', encoding='gb18030')
df2 = pd.read_csv('test2.csv', encoding='gb18030')
df3=pd.concat([df1, df2])
df4=df3.merge(df_data,how='inner',left_on='疾病名称',right_on='疾病')
df4['关系']='相关'
df5=df4[['疾病','症状','关系','简介']]
df5.to_csv('all_data1.csv',index=False)
# df6 = pd.read_csv('all_data1.csv')
# df6['关系']='不相关'
bxg_list = []
for i in range(df3.shape[0]):
  illname = df3.iat[i,1]
  jj = df3.iat[i,2]
  zzdf = df_data[df_data['疾病']!=illname]['症状'].sample(100)
  for j in range(zzdf.shape[0]):
      zz = zzdf.iat[j]
      bxg_list.append([illname, zz, '不相关', jj])
df6 = pd.DataFrame(bxg_list, columns=['疾病','症状', '关系', '简介'])
df6.to_csv('all_data2.csv',index=False)
df7 = pd.concat([df5,df6])
df7.to_csv('all_data.txt', sep='\t', header=None, index=False)